library(tidyverse)
library(tigris)
library(censusapi)
library(sf)
library(mapview)
library(plotly)
library(leaflet)
library(lehdr)
library(ggplot2)

options(
  tigris_class = "sf",
  tigris_use_cache = T 
)

Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")

Mapping distribution of essential workers by block group for San Jose

Using the LODES dataset and the list of essential workers to map the distribution of essential workers by block group in San Jose

First get the block groups in San Jose

scc_blockgroups <- block_groups("CA","Santa Clara", cb=F, progress_bar=F)

# Use tracts sent to us by San Jose
dir <- "pCloud Drive/SFBI/Data Library"
sj_tracts <- st_read(file.path(dir,"San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp")) %>% 
  st_as_sf() %>% 
  st_transform(st_crs(scc_blockgroups))
## Reading layer `CSJ_Census_Tracts' from data source `/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/San_Jose/CSJ_Census_Tracts/CSJ_Census_Tracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 219 features and 9 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## CRS:            2227
sj_citycouncil_disticts <- st_read(file.path(dir, "San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp")) %>% 
  st_as_sf() %>% 
  st_transform(st_crs(scc_blockgroups))
## Reading layer `CITY_COUNCIL_DISTRICTS' from data source `/Users/spencerrobinson/pCloud Drive/SFBI/Data Library/San_Jose/City Council Districts/CITY_COUNCIL_DISTRICTS.shp' using driver `ESRI Shapefile'
## Simple feature collection with 10 features and 7 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: 6112856 ymin: 1869687 xmax: 6255982 ymax: 1996555
## CRS:            2227
# from code written by others to get SJ blockgroups
sj_blockgroups <- 
  scc_blockgroups %>% 
  st_centroid() %>% 
  st_join(sj_tracts, left = F) %>% 
  st_join(sj_citycouncil_disticts%>% dplyr::select(DISTRICTS)) %>% 
  mutate(
    DISTRICTS = DISTRICTS %>% factor(levels = c("1","2","3","4","5","6","7","8","9","10"))
  ) %>% 
  st_set_geometry(NULL) %>% 
  left_join(scc_blockgroups%>% dplyr::select(GEOID), by = "GEOID") %>% 
  st_as_sf() %>% 
  dplyr::select(GEOID, DISTRICTS)

# the spatial join leaves off two blockgroups which are touching district 9. The following code assigns those to district 9
sj_blockgroups$DISTRICTS[is.na(sj_blockgroups$DISTRICTS)] <- 9

sj_boundary <- 
  places("CA", cb=F, progress_bar=F) %>% 
  filter(NAME == "San Jose")

Next obtain the distribution of workers from the LODES dataset

# define the LODES categories, from https://lehd.ces.census.gov/data/lodes/LODES7/LODESTechDoc7.3.pdf
LODES_variable <- c('CNS01', 'CNS02', 'CNS03', 'CNS04', 'CNS05', 'CNS05', 'CNS05', 'CNS06', 'CNS07', 'CNS07', 'CNS08', 'CNS08', 'CNS09', 'CNS10', 'CNS11', 'CNS12', 'CNS13', 'CNS14', 'CNS15', 'CNS16', 'CNS17', 'CNS18', 'CNS19', 'CNS20')
LODES_NAICS <- c('11', '21', '22', '23', '31', '32', '33', '42', '44', '45', '48', '49', '51', '52', '53', '54', '55', '56', '61', '62', '71', '72', '81', '92')
LODES_keys <- data.frame(LODES_variable, LODES_NAICS)

# get the LODES data
sj_rac <- 
  grab_lodes(
    state = "ca", 
    year = 2017, 
    lodes_type = "rac", 
    job_type = "JT01",
    segment = "S000", 
    state_part = "main",
    agg_geo = "bg"
  ) %>% 
  left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>% 
  filter(!is.na(DISTRICTS)) %>%
  dplyr::select(-c(contains("CE"),contains("CA"),contains("CR"), contains("CT"),contains("CS"),contains("CD")))

Get QWI Data which tells us number of works in each 4 digit NAICS code

# Get distribution of jobs in Santa Clara County
#Get NAICS codes
label_industry <-
  read_csv(file.path(dir,"NAICS/label_industry.csv"))
#GetQWI data
qwi_scc <- NULL
for(years in 2017){
  qwi<-
    getCensus(
      name = "timeseries/qwi/sa",
      region = "county:085",
      regionin = "state:06",
      vars = c("EmpS","EarnS","industry","ind_level"),
      time = years
    ) %>%
    filter(ind_level == 4) %>%
    mutate(
      year = substr(time,1,4)
    ) %>%
    left_join(label_industry, by= "industry") %>%
    group_by(year,industry,label) %>%
    summarize(
      EmpS = round(mean(as.numeric(EmpS), na.rm = TRUE),0),
      EarnS = round(mean(as.numeric(EarnS), na.rm = TRUE),0)
    ) %>%
    filter(!is.na(EmpS) & EmpS != 0)
  qwi_scc<-
    rbind(qwi_scc,qwi)
}
arrange(qwi, desc(EmpS))
## # A tibble: 268 x 5
## # Groups:   year, industry [268]
##    year  industry label                                               EmpS EarnS
##    <chr> <chr>    <chr>                                              <dbl> <dbl>
##  1 2017  5415     Computer Systems Design and Related Services       74335 15200
##  2 2017  7225     Restaurants and Other Eating Places                54063  2217
##  3 2017  5191     Other Information Services                         45339 34325
##  4 2017  3341     Computer and Peripheral Equipment Manufacturing    42593 21186
##  5 2017  3344     Semiconductor and Other Electronic Component Manu… 38376 19610
##  6 2017  6111     Elementary and Secondary Schools                   36927  5059
##  7 2017  6221     General Medical and Surgical Hospitals             31975  8206
##  8 2017  6241     Individual and Family Services                     24505  1623
##  9 2017  6113     Colleges, Universities, and Professional Schools   23194  8034
## 10 2017  5112     Software Publishers                                21594 26285
## # … with 258 more rows

Get Essential Deisgnations

#Here I will use the Vulnerability Team's California Essential Business NAICS code list according to California State Public Health Officer's Report 
dir <- "/Users/spencerrobinson/pCloud Drive/SFBI/Data Library"
CA_essential <- read_csv(file.path(dir,'Essential Designations/ca_essential_designations.csv'))
CA_essential<- CA_essential %>%
  mutate(`CA Essential` = replace_na(`CA Essential`, FALSE)) 
CA_essential$`CA Essential` <-  as.logical(CA_essential$`CA Essential`) 

#Here I use Delaware's Essential Business NAICS code list (only goes to 4 digit)
DE_essential <- read_csv(file.path(dir, 'Essential Designations/delaware_essential_designations.csv'))

Method 1: Use the QWI data to find the distribution of workers in NAICS 4-digit codes in Santa Clara County, and use this to weight the contributions of each 2-digit code.

#CA Fraction Employed; CA designations
CA_essential_weighted_grouped_ca1 <- CA_essential %>% 
  mutate(NAICS = str_sub(NAICS, 1,4)) %>% 
  full_join(qwi_scc, by = c('NAICS' = 'industry')) %>%
  mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
  full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>% 
  mutate(EmpS = replace_na(EmpS, 0)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
  group_by(LODES_variable, `CA Essential`) %>% 
  summarize(totalWorkers = sum(EmpS)) %>% # get number of 4 digit codes within each 2 digit code that are essential and nonessential and total workers essential/nonessential 
  ungroup() %>% 
  spread(`CA Essential`, totalWorkers, fill = 0) %>%
  rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>% 
  mutate(totalCount = Essential + Nonessential, fracEssential = Essential / totalCount) %>% 
  mutate(df = "CA")

#CA Fraction Employed; DE designations
CA_essential_weighted_grouped_de1 <- DE_essential %>% 
  mutate(NAICS = str_sub(NAICS, 1,4)) %>% 
  full_join(qwi_scc, by = c('NAICS' = 'industry')) %>%
  mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
  full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>% 
  mutate(EmpS = replace_na(EmpS, 0)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
  group_by(LODES_variable, Essential) %>% 
  summarize(totalWorkers = sum(EmpS)) %>% # get number of 4 digit codes within each 2 digit code that are essential and nonessential and total workers essential/nonessential 
  mutate(Essential = replace_na(Essential, TRUE)) %>% #CNS20 or NAICS code 92 isn't in the DE dataset and is given as NA, but it is public administration and we assume  essential
  ungroup() %>% 
  spread(Essential, totalWorkers, fill = 0) %>%
  rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>% 
  mutate(totalCount = Essential + Nonessential, fracEssential = Essential / totalCount) %>% 
  mutate(df = "DE")

#Let's compare the fraction essential for California versus Delaware 
compare_df <- bind_rows(CA_essential_weighted_grouped_ca1, CA_essential_weighted_grouped_de1)
compare_graph <- ggplot(data = compare_df, aes(x = LODES_variable, y = fracEssential, fill = df))+
  geom_bar(stat = "identity", position = position_dodge())+
  ggtitle(" Fraction Employed in Santa Clara; Comparing Delaware and California Designations")+
  theme_minimal()+
  theme(
plot.title = element_text(size=14, face="bold.italic"),
axis.text.x =  element_text(angle = 90, size=14),
axis.title.y = element_text(size=14)
)
compare_graph

# get essential breakdown in San Jose - CA designations
sj_rac_essential_ca1 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
  left_join(CA_essential_weighted_grouped_ca1 %>% dplyr::select(fracEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>% 
  mutate(numEssential = fracEssential * Number) %>%
  group_by(h_bg, C000) %>%
  summarize(totalEssential = sum(numEssential)) %>%
  ungroup() %>%
  mutate(fracEssential = round((totalEssential / C000)*100, 1))

sj_rac_essential_de1 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
  left_join(CA_essential_weighted_grouped_de1 %>% dplyr::select(fracEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>% 
  mutate(numEssential = fracEssential * Number) %>%
  group_by(h_bg, C000) %>%
  summarize(totalEssential = sum(numEssential)) %>%
  ungroup() %>%
  mutate(fracEssential = round((totalEssential / C000)*100, 1))

Map of method 1 using California Essential Business Designations

# set up palette
blue_pal <- colorNumeric(
  palette = "Blues",
  domain = 
    c(sj_rac_essential_ca1 %>% 
        pull(fracEssential) %>% 
        unique())
)

ca_map1 <- leaflet(data = sj_rac_essential_ca1) %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(
    data = 
      sj_rac_essential_ca1 %>% 
      left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
      st_as_sf() %>% st_set_crs(4326),
    fillColor = ~blue_pal(fracEssential),
    color = "white",
    weight = 0.25,
    fillOpacity = 0.5,
    label = ~paste0(fracEssential,"% of workers that are essential by block group "),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  )%>% 
  addLegend(pal = blue_pal, values = ~fracEssential, opacity = 1, title = "% Workers in Essential Bus.")
ca_map1

Map of method 1 using Delaware Essential Business Designations

red_pal <- colorNumeric(
  palette = "Reds",
  domain = 
    c(sj_rac_essential_de1 %>% 
        pull(fracEssential) %>% 
        unique())
)

de_map1 <- leaflet(data = sj_rac_essential_de1) %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(
    data = 
      sj_rac_essential_de1 %>% 
      left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
      st_as_sf() %>% st_set_crs(4326),
    fillColor = ~red_pal(fracEssential),
    color = "white",
    weight = 0.25,
    fillOpacity = 0.5,
    label = ~paste0(fracEssential,"% of workers that are essential by block group "),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
  ) %>% 
  addLegend(pal = red_pal, values = ~fracEssential, opacity = 1, title = "% Workers in Essential Bus.")
de_map1 

Method 2: Use fraction of 6-digit codes considered essential to create fraction of essential codes for each of the 4-digit codes. Then use Method 1 to determine fraction of essential workers for each 2-digit code.

#CA Fraction Employed; CA designations
CA_essential_fraction <- CA_essential %>% 
  mutate(NAICS = str_sub(NAICS, 1,6)) %>% 
  mutate(NAICS_4_dig = str_sub(NAICS, 1,4)) %>% 
  group_by(NAICS_4_dig, `CA Essential`) %>% 
  summarize(NAICS_factor = n()) %>% 
  spread(`CA Essential`, NAICS_factor) %>% 
  mutate(`FALSE` = replace_na(`FALSE`, 0), `TRUE` = replace_na(`TRUE`, 0)) %>% 
  mutate(total = `TRUE` + `FALSE`, fraction = `TRUE` / total) 
CA_essential_fraction <- CA_essential_fraction[,-(2:4)]

#CA Fraction Employed; CA designations
CA_essential_weighted_grouped_ca2 <- CA_essential %>% 
  mutate(NAICS = str_sub(NAICS, 1,4)) %>% 
  full_join(CA_essential_fraction, by = c('NAICS' = 'NAICS_4_dig')) %>% 
  full_join(qwi_scc, by = c('NAICS' = 'industry')) %>%
  mutate(NAICS_2_dig = substr(NAICS, 1, 2)) %>%
  full_join(LODES_keys, by = c('NAICS_2_dig' = 'LODES_NAICS')) %>%
  mutate(EmpS = replace_na(EmpS, 0)) %>% # assume if not listed it is not essential, and if no employees listed that the number is zero
  {if(TRUE) mutate(., EmpS = fraction*EmpS) else . } %>%  #Multiply number of essential employees by fraction of NAICS codes essential 
    group_by(LODES_variable, `CA Essential`) %>% 
  summarize(totalWorkers = sum(EmpS)) %>%
  ungroup() %>% 
   spread(`CA Essential`,totalWorkers, fill = 0) %>%
  rename(c("Essential" = "TRUE", "Nonessential" = "FALSE")) %>% 
  mutate(totalCount = Essential + Nonessential, fracEssential = Essential / totalCount) %>%
  na.omit() %>% 
  mutate(df = "CA2")
  
compare_df <- bind_rows(CA_essential_weighted_grouped_ca1, CA_essential_weighted_grouped_ca2)
compare_graph <- ggplot(data = compare_df, aes(x = LODES_variable, y = fracEssential, fill = df))+
  geom_bar(stat = "identity", position = position_dodge())+
  ggtitle("Fraction Employed in Santa Clara; Comparing Delaware and California Designations")+
  theme_minimal()+
  theme(
plot.title = element_text(size=14, face="bold.italic"),
axis.text.x =  element_text(angle = 90, size=14),
axis.title.y = element_text(size=14)
)
compare_graph

# get essential breakdown in San Jose - CA designations
sj_rac_essential_ca2 <- sj_rac %>% gather("Category", "Number", CNS01:CNS20) %>%
  left_join(CA_essential_weighted_grouped_ca2 %>% dplyr::select(fracEssential, LODES_variable), by = c("Category" = "LODES_variable")) %>% 
  mutate(numEssential = fracEssential * Number) %>%
  group_by(h_bg, C000) %>%
  summarize(totalEssential = sum(numEssential, na.rm = T)) %>%
  ungroup() %>%
  mutate(fracEssential = round((totalEssential / C000)*100, 1))

Map of method 2 using California Essential Business Designations

# set up palette
blue_pal <- colorNumeric(
  palette = "Blues",
  domain = 
    c(sj_rac_essential_ca2 %>% 
        pull(fracEssential) %>% 
        unique())
)

ca_map2 <- leaflet(data = sj_rac_essential_ca2) %>% 
  addProviderTiles(providers$CartoDB.Positron) %>% 
  addPolygons(
    data = 
      sj_rac_essential_ca2 %>% 
      left_join(sj_blockgroups, by = c("h_bg" = "GEOID")) %>%
      st_as_sf() %>% st_set_crs(4326),
    fillColor = ~blue_pal(fracEssential),
    color = "white",
    weight = 0.25,
    fillOpacity = 0.5,
    label = ~paste0(fracEssential,"%"),
    highlightOptions = highlightOptions(
      weight = 2,
      opacity = 1
    )
) %>% 
  addLegend(pal = blue_pal, values = ~fracEssential, opacity = 1, title = "% Workers in Essential Bus.")

ca_map2